This assignment aims to analyze the policing dataset by using advanced graphics and interactive plots. I have received two datasets - ‘crime23.csv’ and ‘temp2023.csv’.
The ‘crime23’ dataset contains information about crimes that occurred in Colchester in 2023. The dataset provides details such as the type and location of the crime, the date of the crime in the format of year and month (Y-m), and the status of the offence outcome.
The ‘temp2023’ dataset contains daily climate data collected from a weather station near Colchester. This dataset provides information on various measures such as temperature, wind speed, pressure, and visibility.
crime23 <- read.csv('crime23.csv')
temp2023 <- read.csv('temp2023.csv')
The two-way table displays data on various types of crime that occurred in Colchester in 2023. This crime data includes shoplifting, anti-social behavior, public order, and others. The data is presented in rows and columns to make it easier to understand. The table shows that there were 14 different categories of crime, with violent crime being the most frequent, having a frequency of 2633 cases. The second most frequent crime was anti-social behavior, with 677 cases. The least frequent crime was possession of weapons, with only 74 cases.
attach(crime23)
ttable <- table(category, date)
two_way_table <- sort(table(category), decreasing = TRUE)
knitr::kable(two_way_table, caption = 'Colchester Crime Frequency in 2023', col.names = c('Crime', 'Frequency'))
| Crime | Frequency |
|---|---|
| violent-crime | 2633 |
| anti-social-behaviour | 677 |
| criminal-damage-arson | 581 |
| shoplifting | 554 |
| public-order | 532 |
| other-theft | 491 |
| vehicle-crime | 406 |
| bicycle-theft | 235 |
| burglary | 225 |
| drugs | 208 |
| robbery | 94 |
| other-crime | 92 |
| theft-from-the-person | 76 |
| possession-of-weapons | 74 |
The following barplot depicts the number of crimes reported each month in the year 2023, using a scale fill gradient to represent the frequency of crimes reported. Darker blue indicates a higher number of reported cases, whereas lighter blue represents a lower number. The plot reveals that January and September had the highest reported offences for the year 2023.
CrimeCounts <- table(crime23$date)
CrimeOffenceCounts <- as.data.frame(CrimeCounts)
names(CrimeOffenceCounts) <- c("Month", "CrimeCounts")
violent_crime_per_month <- ggplot(CrimeOffenceCounts, aes(x = Month, y = CrimeCounts, fill = CrimeCounts)) +
geom_bar(stat = 'identity', width = 0.7) +
labs(title = "Number of Reported Offences per Month in 2023",
x = "Month",
y = "Number of Offences") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
violent_crime_per_month
frequency_by_street <- crime23 %>%
group_by(street_name) %>%
summarise(total_offences = n()) %>%
arrange(desc(total_offences)) %>%
slice(2:11)
Offences_by_street <- ggplot(data = frequency_by_street, aes(x = street_name, y = total_offences)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Top 10 Frequency of Offenses by Street Name",
x = "Street Name",
y = "Total Offenses") +
theme(axis.text.x = element_text(vjust = 0.1, hjust=1)) +
coord_flip()
ggplotly(Offences_by_street)
The pie chart below displays the distribution of outcomes for all crimes. As per the graph, 42.83% of investigations were complete with no suspects identified.
CrimeOutcome <- crime23 %>%
group_by(outcome_status) %>%
filter(!is.na(outcome_status)) %>%
summarize(count = n())
TotalCount <- sum(CrimeOutcome$count)
OutcomeDistribution <- CrimeOutcome %>% mutate(Percentage = round((count / TotalCount) * 100, digits = 2))
plot_ly(OutcomeDistribution, labels = ~outcome_status, values = ~Percentage, type = 'pie', marker = list(colors = essex_palette)) %>%
layout(title = "Percentage Distribution of Outcomes For Violent-Crimes",
showlegend = TRUE)
For all reported offenses.
crime23_year <- crime23 %>%
group_by(date) %>%
summarize(count = n())
ggplot(crime23_year, aes(x = count)) +
geom_density(fill = "#AB274F", color = "black", alpha = 0.7) +
labs(title = "Density Plot of All Reported Incidents",
x = "Offence Counts",
y = "Density") +
theme_minimal()
crime23_spring <- crime23 %>% filter(date %in% c('2023-03', '2023-04', '2023-05'))
crime23_summer <- crime23 %>% filter(date %in% c('2023-06', '2023-07', '2023-08'))
crime23_autumn <- crime23 %>% filter(date %in% c('2023-09', '2023-10', '2023-11'))
crime23_winter <- crime23 %>% filter(date %in% c('2023-12', '2023-01', '2023-02'))
combine_seasons <- rbind(mutate(crime23_spring, Season = "Spring"),
mutate(crime23_summer, Season = "Summer"),
mutate(crime23_autumn, Season = "Autumn"),
mutate(crime23_winter, Season = "Winter"))
season_crime_freq <- combine_seasons %>% group_by(Season, category) %>%
summarize(avg = n())
## `summarise()` has grouped output by 'Season'. You can override using the
## `.groups` argument.
This boxplot displays the average number of crimes for each season. Each season is represented by a different colored box.
Seasonal_boxplot <- ggplot(season_crime_freq, aes(x = Season, y = avg, fill = Season)) +
geom_boxplot() +
labs(x = 'Seasons', y = 'Offence rate', title = 'Crime rates among different seasons')
Seasonal_boxplot + scale_fill_manual(values = essex_palette)
temp2023$Date<- as.Date(temp2023$Date, format = "%Y-%m-%d")
temp2023$month <- format(temp2023$Date,"%Y-%m")
avgtemp_per_month <- temp2023 %>% group_by(month) %>% summarize(avg = mean(TemperatureCAvg))
names(avgtemp_per_month) = c('date', 'avg_temp')
temp_crime <- merge(avgtemp_per_month, crime23_year, by ='date')
correlation <- cor(temp_crime$count, temp_crime$avg_temp)
correlplot <- ggplot(temp_crime, aes(x = count, y = avg_temp)) +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, colour = 'red') +
labs(title = paste("Correlation Plot (Correlation Coefficient: ", round(correlation, 2), ")"),
x = "Count of Crimes", y = "Average Temperature")
correlplot
## `geom_smooth()` using formula = 'y ~ x'
The correlation coefficient 0.24 indicates a weak linear relationship between the average monthly temperature and the number of crimes.
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
crime23_june <- crime23 %>% filter(date %in% c('2023-06'))
map <- crime23_june %>%
group_by(street_name, category, lat, long)
map$long <- as.numeric(map$long)
map$lat <- as.numeric(map$lat)
m <- leaflet(map) %>%
addTiles() %>% addCircleMarkers(popup = ~category) %>%
addCircleMarkers(data = crime23_summer[crime23_summer$category=="violent-crime",], group = "violent-crime",color="red", popup = ~category)
## Assuming "long" and "lat" are longitude and latitude, respectively
## Assuming "long" and "lat" are longitude and latitude, respectively
m
library(lubridate)
library(xts)
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following object is masked from 'package:leaflet':
##
## addLegend
## The following objects are masked from 'package:dplyr':
##
## first, last
library(dplyr)
library(lubridate)
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo